{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Requirement already satisfied: requests-html in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (0.10.0)\n",
      "Requirement already satisfied: bs4 in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (0.0.1)\n",
      "Requirement already satisfied: pyppeteer>=0.0.14 in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (1.0.2)\n",
      "Requirement already satisfied: parse in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (1.19.0)\n",
      "Requirement already satisfied: fake-useragent in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (1.1.3)\n",
      "Requirement already satisfied: requests in c:\\programdata\\anaconda3\\lib\\site-packages (from requests-html) (2.27.1)\n",
      "Requirement already satisfied: w3lib in c:\\programdata\\anaconda3\\lib\\site-packages (from requests-html) (1.21.0)\n",
      "Requirement already satisfied: pyquery in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (2.0.0)\n",
      "Requirement already satisfied: appdirs<2.0.0,>=1.4.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (1.4.4)\n",
      "Requirement already satisfied: pyee<9.0.0,>=8.1.0 in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from pyppeteer>=0.0.14->requests-html) (8.2.2)\n",
      "Requirement already satisfied: certifi>=2021 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (2021.10.8)\n",
      "Requirement already satisfied: websockets<11.0,>=10.0 in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from pyppeteer>=0.0.14->requests-html) (10.4)\n",
      "Requirement already satisfied: importlib-metadata>=1.4 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (4.11.3)\n",
      "Requirement already satisfied: urllib3<2.0.0,>=1.25.8 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (1.26.9)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.42.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (4.64.0)\n",
      "Requirement already satisfied: zipp>=0.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from importlib-metadata>=1.4->pyppeteer>=0.0.14->requests-html) (3.7.0)\n",
      "Requirement already satisfied: colorama in c:\\programdata\\anaconda3\\lib\\site-packages (from tqdm<5.0.0,>=4.42.1->pyppeteer>=0.0.14->requests-html) (0.4.4)\n",
      "Requirement already satisfied: beautifulsoup4 in c:\\programdata\\anaconda3\\lib\\site-packages (from bs4->requests-html) (4.11.1)\n",
      "Requirement already satisfied: soupsieve>1.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from beautifulsoup4->bs4->requests-html) (2.3.1)\n",
      "Requirement already satisfied: importlib-resources>=5.0 in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from fake-useragent->requests-html) (5.12.0)\n",
      "Requirement already satisfied: cssselect>=1.2.0 in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from pyquery->requests-html) (1.2.0)\n",
      "Requirement already satisfied: lxml>=2.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyquery->requests-html) (4.8.0)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->requests-html) (2.0.4)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->requests-html) (3.3)\n",
      "Requirement already satisfied: six>=1.4.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from w3lib->requests-html) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install requests-html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie XSRF-TOKEN=zIARnRM0QQaLudJk_4OBXw; __gc_id=e936feed5bd343e5a73b304fa0d4ad5f; _ga=GA1.1.504776059.1697021947; __uuid=1697021948310.51; __tlog=1697021948354.48%7C00000000%7C00000000%7C00000000%7C00000000; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1697021951; acw_tc=2760828416970219810274366e53a098388abcfc886c89f5cbf2332344893f; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1697021975; __session_seq=4; __uv_seq=4; __tlg_event_seq=24; _ga_54YTJKWN86=GS1.1.1697021946.1.1.1697023027.0.0.0\n",
      "这是第1页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待7秒...然后以继续抓取\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>job.advViewFlag</th>\n",
       "      <th>job.link</th>\n",
       "      <th>job.dq</th>\n",
       "      <th>job.title</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "      <th>job.refreshTime</th>\n",
       "      <th>job.jobKind</th>\n",
       "      <th>...</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>job.pcOuterLink</th>\n",
       "      <th>job.h5OuterLink</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>job.campusJobKind</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...</td>\n",
       "      <td>{\"recruiterName\":\"李先生\",\"imId\":\"3f588040b06a7b3...</td>\n",
       "      <td>[广告策划, 新媒体]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/a/49462105.shtml</td>\n",
       "      <td>深圳</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>20230916093155</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td></td>\n",
       "      <td>某深圳云计算/大数据公司</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>云计算/大数据</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...</td>\n",
       "      <td>{\"recruiterName\":\"倪先生\",\"imId\":\"98379e69c2eee69...</td>\n",
       "      <td>[广告策划]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/a/50016611.shtml</td>\n",
       "      <td>深圳-福田区</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>20231102191952</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td></td>\n",
       "      <td>某深圳互联网公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>互联网</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...</td>\n",
       "      <td>{\"recruiterName\":\"张女士\",\"imId\":\"ab85de58bc41b60...</td>\n",
       "      <td>[广告策划, 营销策划, 新媒体, 新媒体策划, 广告/传媒/文化]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/a/49511733.shtml</td>\n",
       "      <td>深圳</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>20230919093804</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td></td>\n",
       "      <td>某深圳咨询服务公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>咨询服务</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...</td>\n",
       "      <td>{\"recruiterName\":\"叶先生\",\"imId\":\"2e7803cd11f19d9...</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/a/49454013.shtml</td>\n",
       "      <td>深圳-福田区</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>20230915160702</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td></td>\n",
       "      <td>某深圳咨询服务公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>咨询服务</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...</td>\n",
       "      <td>{\"recruiterName\":\"李先生\",\"imId\":\"f8f6775be4aaadf...</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/a/49570391.shtml</td>\n",
       "      <td>深圳</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>20231014200408</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td></td>\n",
       "      <td>某深圳咨询服务公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>咨询服务</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"recruiterPhoto\":\"617aa8473dba297879e635a302u...</td>\n",
       "      <td>[3个月, 提供转正, 本科, 市场营销, 广告策划, 市场策划, 品牌推广, 活动策划, ...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/lptjob/62579301</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>海外市场实习生(J11029)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20231031114858</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>5d06ef909fae687d4390e99b07u.png</td>\n",
       "      <td>https://www.liepin.com/company/9322029/</td>\n",
       "      <td>石头科技</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>智能硬件</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>9322029.0</td>\n",
       "      <td>实习</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"recruiterPhoto\":\"5f8f9866dfb13a7dee342f1808u...</td>\n",
       "      <td>[3个月, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, 广告媒介, ...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/lptjob/62520763</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>海外市场实习生(J11025)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20231027151058</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>5d06ef909fae687d4390e99b07u.png</td>\n",
       "      <td>https://www.liepin.com/company/9322029/</td>\n",
       "      <td>石头科技</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>智能硬件</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>9322029.0</td>\n",
       "      <td>实习</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"recruiterPhoto\":\"617aa8473dba297879e635a302u...</td>\n",
       "      <td>[3个月, 提供转正, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, ...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/lptjob/62438217</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>海外市场实习生(J11025)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20231024163614</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>5d06ef909fae687d4390e99b07u.png</td>\n",
       "      <td>https://www.liepin.com/company/9322029/</td>\n",
       "      <td>石头科技</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>智能硬件</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>9322029.0</td>\n",
       "      <td>实习</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"recruiterPhoto\":\"5f8f9868f6d1ab58476f24a008u...</td>\n",
       "      <td>[本科, 市场营销]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/lptjob/61734531</td>\n",
       "      <td>深圳</td>\n",
       "      <td>数字化市场运营岗</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20230918224323</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5bfea5f974719d2aa34cef7003a.png</td>\n",
       "      <td>https://www.liepin.com/company/2034027/</td>\n",
       "      <td>中国联通广东省分公司</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>运营商/增值服务</td>\n",
       "      <td>https://atsc.liepin.com/ats/apply-form/?jobId=...</td>\n",
       "      <td>https://matsc.liepin.com/ats/apply-form/?jobId...</td>\n",
       "      <td>2034027.0</td>\n",
       "      <td>应届</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"recruiterPhoto\":\"5f8f986779c7cc70efbf36c008u...</td>\n",
       "      <td>[本科, 品牌推广, 品牌运营, 新媒体, 国际品牌, 公众号, 广告/传媒/文化, 教育培训]</td>\n",
       "      <td>False</td>\n",
       "      <td>https://www.liepin.com/lptjob/61666977</td>\n",
       "      <td>深圳-坪山区</td>\n",
       "      <td>品牌岗（新媒体及文案方向） (24届应届生)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20231113143544</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>63b40735895ec0179bd60f2507u.png</td>\n",
       "      <td>https://www.liepin.com/company/13261319/</td>\n",
       "      <td>深圳市华朗学校</td>\n",
       "      <td></td>\n",
       "      <td>学校教育</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>13261319.0</td>\n",
       "      <td>应届</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             dataInfo  \\\n",
       "0   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...   \n",
       "1   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...   \n",
       "2   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...   \n",
       "3   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...   \n",
       "4   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22page...   \n",
       "..                                                ...   \n",
       "35  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "36  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "37  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "38  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "39  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "\n",
       "                                           dataParams  \\\n",
       "0   {\"recruiterName\":\"李先生\",\"imId\":\"3f588040b06a7b3...   \n",
       "1   {\"recruiterName\":\"倪先生\",\"imId\":\"98379e69c2eee69...   \n",
       "2   {\"recruiterName\":\"张女士\",\"imId\":\"ab85de58bc41b60...   \n",
       "3   {\"recruiterName\":\"叶先生\",\"imId\":\"2e7803cd11f19d9...   \n",
       "4   {\"recruiterName\":\"李先生\",\"imId\":\"f8f6775be4aaadf...   \n",
       "..                                                ...   \n",
       "35  {\"recruiterPhoto\":\"617aa8473dba297879e635a302u...   \n",
       "36  {\"recruiterPhoto\":\"5f8f9866dfb13a7dee342f1808u...   \n",
       "37  {\"recruiterPhoto\":\"617aa8473dba297879e635a302u...   \n",
       "38  {\"recruiterPhoto\":\"5f8f9868f6d1ab58476f24a008u...   \n",
       "39  {\"recruiterPhoto\":\"5f8f986779c7cc70efbf36c008u...   \n",
       "\n",
       "                                           job.labels  job.advViewFlag  \\\n",
       "0                                         [广告策划, 新媒体]            False   \n",
       "1                                              [广告策划]            False   \n",
       "2                  [广告策划, 营销策划, 新媒体, 新媒体策划, 广告/传媒/文化]            False   \n",
       "3                                                  []            False   \n",
       "4                                                  []            False   \n",
       "..                                                ...              ...   \n",
       "35  [3个月, 提供转正, 本科, 市场营销, 广告策划, 市场策划, 品牌推广, 活动策划, ...            False   \n",
       "36  [3个月, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, 广告媒介, ...            False   \n",
       "37  [3个月, 提供转正, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, ...            False   \n",
       "38                                         [本科, 市场营销]            False   \n",
       "39   [本科, 品牌推广, 品牌运营, 新媒体, 国际品牌, 公众号, 广告/传媒/文化, 教育培训]            False   \n",
       "\n",
       "                                   job.link  job.dq               job.title  \\\n",
       "0   https://www.liepin.com/a/49462105.shtml      深圳                    广告策划   \n",
       "1   https://www.liepin.com/a/50016611.shtml  深圳-福田区                    广告策划   \n",
       "2   https://www.liepin.com/a/49511733.shtml      深圳                    广告策划   \n",
       "3   https://www.liepin.com/a/49454013.shtml  深圳-福田区                    广告策划   \n",
       "4   https://www.liepin.com/a/49570391.shtml      深圳                    广告策划   \n",
       "..                                      ...     ...                     ...   \n",
       "35   https://www.liepin.com/lptjob/62579301  深圳-南山区         海外市场实习生(J11029)   \n",
       "36   https://www.liepin.com/lptjob/62520763  深圳-南山区         海外市场实习生(J11025)   \n",
       "37   https://www.liepin.com/lptjob/62438217  深圳-南山区         海外市场实习生(J11025)   \n",
       "38   https://www.liepin.com/lptjob/61734531      深圳                数字化市场运营岗   \n",
       "39   https://www.liepin.com/lptjob/61666977  深圳-坪山区  品牌岗（新媒体及文案方向） (24届应届生)   \n",
       "\n",
       "   job.requireWorkYears job.refreshTime job.jobKind  ... comp.compStage  \\\n",
       "0                  3-5年  20230916093155           1  ...          融资未公开   \n",
       "1                  3-5年  20231102191952           1  ...          不需要融资   \n",
       "2                  3-5年  20230919093804           1  ...            NaN   \n",
       "3                  3-5年  20230915160702           1  ...            NaN   \n",
       "4                  经验不限  20231014200408           1  ...            NaN   \n",
       "..                  ...             ...         ...  ...            ...   \n",
       "35                  NaN  20231031114858           6  ...         沪深A股上市   \n",
       "36                  NaN  20231027151058           6  ...         沪深A股上市   \n",
       "37                  NaN  20231024163614           6  ...         沪深A股上市   \n",
       "38                  NaN  20230918224323           6  ...            NaN   \n",
       "39                  NaN  20231113143544           6  ...            NaN   \n",
       "\n",
       "                      comp.compLogo                                 comp.link  \\\n",
       "0   60e2fe0bf3df194a3c48adb502u.png                                             \n",
       "1   60e2fe0bf3df194a3c48adb502u.png                                             \n",
       "2   60e2fe0bf3df194a3c48adb502u.png                                             \n",
       "3   60e2fe0bf3df194a3c48adb502u.png                                             \n",
       "4   60e2fe0bf3df194a3c48adb502u.png                                             \n",
       "..                              ...                                       ...   \n",
       "35  5d06ef909fae687d4390e99b07u.png   https://www.liepin.com/company/9322029/   \n",
       "36  5d06ef909fae687d4390e99b07u.png   https://www.liepin.com/company/9322029/   \n",
       "37  5d06ef909fae687d4390e99b07u.png   https://www.liepin.com/company/9322029/   \n",
       "38  5bfea5f974719d2aa34cef7003a.png   https://www.liepin.com/company/2034027/   \n",
       "39  63b40735895ec0179bd60f2507u.png  https://www.liepin.com/company/13261319/   \n",
       "\n",
       "   comp.compName comp.compScale comp.compIndustry  \\\n",
       "0   某深圳云计算/大数据公司       100-499人           云计算/大数据   \n",
       "1       某深圳互联网公司          1-49人               互联网   \n",
       "2      某深圳咨询服务公司         50-99人              咨询服务   \n",
       "3      某深圳咨询服务公司         50-99人              咨询服务   \n",
       "4      某深圳咨询服务公司         50-99人              咨询服务   \n",
       "..           ...            ...               ...   \n",
       "35          石头科技     1000-2000人              智能硬件   \n",
       "36          石头科技     1000-2000人              智能硬件   \n",
       "37          石头科技     1000-2000人              智能硬件   \n",
       "38    中国联通广东省分公司       10000人以上          运营商/增值服务   \n",
       "39       深圳市华朗学校                             学校教育   \n",
       "\n",
       "                                      job.pcOuterLink  \\\n",
       "0                                                 NaN   \n",
       "1                                                 NaN   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "..                                                ...   \n",
       "35                                                      \n",
       "36                                                      \n",
       "37                                                      \n",
       "38  https://atsc.liepin.com/ats/apply-form/?jobId=...   \n",
       "39                                                      \n",
       "\n",
       "                                      job.h5OuterLink comp.compId  \\\n",
       "0                                                 NaN         NaN   \n",
       "1                                                 NaN         NaN   \n",
       "2                                                 NaN         NaN   \n",
       "3                                                 NaN         NaN   \n",
       "4                                                 NaN         NaN   \n",
       "..                                                ...         ...   \n",
       "35                                                      9322029.0   \n",
       "36                                                      9322029.0   \n",
       "37                                                      9322029.0   \n",
       "38  https://matsc.liepin.com/ats/apply-form/?jobId...   2034027.0   \n",
       "39                                                     13261319.0   \n",
       "\n",
       "    job.campusJobKind  \n",
       "0                 NaN  \n",
       "1                 NaN  \n",
       "2                 NaN  \n",
       "3                 NaN  \n",
       "4                 NaN  \n",
       "..                ...  \n",
       "35                 实习  \n",
       "36                 实习  \n",
       "37                 实习  \n",
       "38                 应届  \n",
       "39                 应届  \n",
       "\n",
       "[400 rows x 32 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 猎聘 没有设置登陆cookie，仅可抓取10页\n",
    "import crawl_liepin\n",
    "\n",
    "crawl_liepin.crawl(城市=\"深圳\",关键词=\"广告策划\",学历='本科',工作经验='应届生')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. cookies实现登陆，获取完整数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 建立登陆cookie\n",
    "cookie = \"inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0\"\n",
    "cookie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0\n",
      "这是第1页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待5秒...然后以继续抓取\n"
     ]
    }
   ],
   "source": [
    "import crawl_liepin\n",
    "\n",
    "广告策划_深圳_results = crawl_liepin.crawl(城市=\"深圳\",关键词=\"广告策划\",学历='本科',工作经验='',登录cookie=cookie)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. 使用数据分析进行数据交互式可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method DataFrame.to_html of                                              dataInfo  \\\n",
       "0   %7B%22jobKind%22%3A%221%22%2C%22pageSize%22%3A...   \n",
       "1   %7B%22jobKind%22%3A%221%22%2C%22pageSize%22%3A...   \n",
       "2   %7B%22jobKind%22%3A%221%22%2C%22pageSize%22%3A...   \n",
       "3   %7B%22jobKind%22%3A%221%22%2C%22pageSize%22%3A...   \n",
       "4   %7B%22jobKind%22%3A%221%22%2C%22pageSize%22%3A...   \n",
       "..                                                ...   \n",
       "35  %7B%22scene%22%3A%22condition%22%2C%22skId%22%...   \n",
       "36  %7B%22scene%22%3A%22condition%22%2C%22skId%22%...   \n",
       "37  %7B%22scene%22%3A%22condition%22%2C%22skId%22%...   \n",
       "38  %7B%22scene%22%3A%22condition%22%2C%22skId%22%...   \n",
       "39  %7B%22scene%22%3A%22condition%22%2C%22skId%22%...   \n",
       "\n",
       "                                           dataParams  \\\n",
       "0   {\"recruiterName\":\"李先生\",\"imId\":\"3f588040b06a7b3...   \n",
       "1   {\"recruiterName\":\"倪先生\",\"imId\":\"98379e69c2eee69...   \n",
       "2   {\"recruiterName\":\"张女士\",\"imId\":\"ab85de58bc41b60...   \n",
       "3   {\"recruiterName\":\"叶先生\",\"imId\":\"2e7803cd11f19d9...   \n",
       "4   {\"recruiterName\":\"李先生\",\"imId\":\"f8f6775be4aaadf...   \n",
       "..                                                ...   \n",
       "35  {\"jobKind\":\"6\",\"recruiterName\":\"林女士\",\"userId\":...   \n",
       "36  {\"jobKind\":\"6\",\"recruiterName\":\"赵女士\",\"userId\":...   \n",
       "37  {\"jobKind\":\"6\",\"recruiterName\":\"林女士\",\"userId\":...   \n",
       "38  {\"jobKind\":\"6\",\"recruiterName\":\"高先生\",\"userId\":...   \n",
       "39  {\"jobKind\":\"6\",\"recruiterName\":\"许女士\",\"userId\":...   \n",
       "\n",
       "                                           job.labels  \\\n",
       "0                                         [广告策划, 新媒体]   \n",
       "1                                              [广告策划]   \n",
       "2                  [广告策划, 营销策划, 新媒体, 新媒体策划, 广告/传媒/文化]   \n",
       "3                                                  []   \n",
       "4                                                  []   \n",
       "..                                                ...   \n",
       "35  [3个月, 提供转正, 本科, 市场营销, 广告策划, 市场策划, 品牌推广, 活动策划, ...   \n",
       "36  [3个月, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, 广告媒介, ...   \n",
       "37  [3个月, 提供转正, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, ...   \n",
       "38                                         [本科, 市场营销]   \n",
       "39   [本科, 品牌推广, 品牌运营, 新媒体, 国际品牌, 公众号, 广告/传媒/文化, 教育培训]   \n",
       "\n",
       "                                   job.link               job.title  job.dq  \\\n",
       "0   https://www.liepin.com/a/49462105.shtml                    广告策划      深圳   \n",
       "1   https://www.liepin.com/a/50016611.shtml                    广告策划  深圳-福田区   \n",
       "2   https://www.liepin.com/a/49511733.shtml                    广告策划      深圳   \n",
       "3   https://www.liepin.com/a/49454013.shtml                    广告策划  深圳-福田区   \n",
       "4   https://www.liepin.com/a/49570391.shtml                    广告策划      深圳   \n",
       "..                                      ...                     ...     ...   \n",
       "35   https://www.liepin.com/lptjob/62579301         海外市场实习生(J11029)  深圳-南山区   \n",
       "36   https://www.liepin.com/lptjob/62520763         海外市场实习生(J11025)  深圳-南山区   \n",
       "37   https://www.liepin.com/lptjob/62438217         海外市场实习生(J11025)  深圳-南山区   \n",
       "38   https://www.liepin.com/lptjob/61734531                数字化市场运营岗      深圳   \n",
       "39   https://www.liepin.com/lptjob/61666977  品牌岗（新媒体及文案方向） (24届应届生)  深圳-坪山区   \n",
       "\n",
       "   job.jobKind job.refreshTime  job.topJob job.jobId  ...  \\\n",
       "0            1  20230916093155       False  49462105  ...   \n",
       "1            1  20231102191952       False  50016611  ...   \n",
       "2            1  20230919093804       False  49511733  ...   \n",
       "3            1  20230915160702       False  49454013  ...   \n",
       "4            1  20231014200408       False  49570391  ...   \n",
       "..         ...             ...         ...       ...  ...   \n",
       "35           6  20231031114858       False  62579301  ...   \n",
       "36           6  20231027151058       False  62520763  ...   \n",
       "37           6  20231024163614       False  62438217  ...   \n",
       "38           6  20230918224323       False  61734531  ...   \n",
       "39           6  20231113143544       False  61666977  ...   \n",
       "\n",
       "                                   comp.link  comp.compStage  \\\n",
       "0                                                      融资未公开   \n",
       "1                                                      不需要融资   \n",
       "2                                                        NaN   \n",
       "3                                                        NaN   \n",
       "4                                                        NaN   \n",
       "..                                       ...             ...   \n",
       "35   https://www.liepin.com/company/9322029/          沪深A股上市   \n",
       "36   https://www.liepin.com/company/9322029/          沪深A股上市   \n",
       "37   https://www.liepin.com/company/9322029/          沪深A股上市   \n",
       "38   https://www.liepin.com/company/2034027/             NaN   \n",
       "39  https://www.liepin.com/company/13261319/             NaN   \n",
       "\n",
       "                      comp.compLogo comp.compName comp.compScale  \\\n",
       "0   60e2fe0bf3df194a3c48adb502u.png  某深圳云计算/大数据公司       100-499人   \n",
       "1   60e2fe0bf3df194a3c48adb502u.png      某深圳互联网公司          1-49人   \n",
       "2   60e2fe0bf3df194a3c48adb502u.png     某深圳咨询服务公司         50-99人   \n",
       "3   60e2fe0bf3df194a3c48adb502u.png     某深圳咨询服务公司         50-99人   \n",
       "4   60e2fe0bf3df194a3c48adb502u.png     某深圳咨询服务公司         50-99人   \n",
       "..                              ...           ...            ...   \n",
       "35  5d06ef909fae687d4390e99b07u.png          石头科技     1000-2000人   \n",
       "36  5d06ef909fae687d4390e99b07u.png          石头科技     1000-2000人   \n",
       "37  5d06ef909fae687d4390e99b07u.png          石头科技     1000-2000人   \n",
       "38  5bfea5f974719d2aa34cef7003a.png    中国联通广东省分公司       10000人以上   \n",
       "39  63b40735895ec0179bd60f2507u.png       深圳市华朗学校                  \n",
       "\n",
       "   comp.compIndustry                                    job.pcOuterLink  \\\n",
       "0            云计算/大数据                                                NaN   \n",
       "1                互联网                                                NaN   \n",
       "2               咨询服务                                                NaN   \n",
       "3               咨询服务                                                NaN   \n",
       "4               咨询服务                                                NaN   \n",
       "..               ...                                                ...   \n",
       "35              智能硬件                                                      \n",
       "36              智能硬件                                                      \n",
       "37              智能硬件                                                      \n",
       "38          运营商/增值服务  https://atsc.liepin.com/ats/apply-form/?jobId=...   \n",
       "39              学校教育                                                      \n",
       "\n",
       "                                      job.h5OuterLink comp.compId  \\\n",
       "0                                                 NaN         NaN   \n",
       "1                                                 NaN         NaN   \n",
       "2                                                 NaN         NaN   \n",
       "3                                                 NaN         NaN   \n",
       "4                                                 NaN         NaN   \n",
       "..                                                ...         ...   \n",
       "35                                                      9322029.0   \n",
       "36                                                      9322029.0   \n",
       "37                                                      9322029.0   \n",
       "38  https://matsc.liepin.com/ats/apply-form/?jobId...   2034027.0   \n",
       "39                                                     13261319.0   \n",
       "\n",
       "    job.campusJobKind  \n",
       "0                 NaN  \n",
       "1                 NaN  \n",
       "2                 NaN  \n",
       "3                 NaN  \n",
       "4                 NaN  \n",
       "..                ...  \n",
       "35                 实习  \n",
       "36                 实习  \n",
       "37                 实习  \n",
       "38                 应届  \n",
       "39                 应届  \n",
       "\n",
       "[400 rows x 32 columns]>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "广告策划_深圳_results.to_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['dataInfo', 'dataParams', 'job.labels', 'job.link', 'job.title',\n",
       "       'job.dq', 'job.jobKind', 'job.refreshTime', 'job.topJob', 'job.jobId',\n",
       "       'job.salary', 'job.advViewFlag', 'job.requireWorkYears',\n",
       "       'job.requireEduLevel', 'job.dataPromId', 'recruiter.recruiterName',\n",
       "       'recruiter.recruiterTitle', 'recruiter.imId', 'recruiter.imUserType',\n",
       "       'recruiter.chatted', 'recruiter.recruiterId',\n",
       "       'recruiter.recruiterPhoto', 'comp.link', 'comp.compStage',\n",
       "       'comp.compLogo', 'comp.compName', 'comp.compScale', 'comp.compIndustry',\n",
       "       'job.pcOuterLink', 'job.h5OuterLink', 'comp.compId',\n",
       "       'job.campusJobKind'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "广告策划_深圳_results.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "深圳        140\n",
       "深圳-南山区     70\n",
       "深圳-福田区     58\n",
       "深圳-宝安区     44\n",
       "深圳-龙岗区     40\n",
       "深圳-龙华区     25\n",
       "深圳-罗湖区     13\n",
       "深圳-坪山区      5\n",
       "深圳-光明区      4\n",
       "深圳-盐田区      1\n",
       "Name: job.dq, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_dq = 广告策划_深圳_results['job.dq'].value_counts()\n",
    "series_dq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['深圳-南山区',\n",
       " '深圳-福田区',\n",
       " '深圳-宝安区',\n",
       " '深圳-龙岗区',\n",
       " '深圳-龙华区',\n",
       " '深圳-罗湖区',\n",
       " '深圳-坪山区',\n",
       " '深圳-光明区',\n",
       " '深圳-盐田区']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dq_name= [i for i in series_dq.index if '-' in i]\n",
    "dq_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([70, 58, 44, 40, 25, 13,  5,  4,  1], dtype=int64)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_dq[dq_name].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Requirement already satisfied: pyecharts in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (2.0.3)\n",
      "Requirement already satisfied: prettytable in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from pyecharts) (3.7.0)\n",
      "Requirement already satisfied: simplejson in c:\\users\\32617\\appdata\\roaming\\python\\python39\\site-packages (from pyecharts) (3.19.1)\n",
      "Requirement already satisfied: jinja2 in c:\\programdata\\anaconda3\\lib\\site-packages (from pyecharts) (2.11.3)\n",
      "Requirement already satisfied: MarkupSafe>=0.23 in c:\\programdata\\anaconda3\\lib\\site-packages (from jinja2->pyecharts) (2.0.1)\n",
      "Requirement already satisfied: wcwidth in c:\\programdata\\anaconda3\\lib\\site-packages (from prettytable->pyecharts) (0.2.5)\n"
     ]
    }
   ],
   "source": [
    "!pip install pyecharts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Map\n",
    "from pyecharts.faker import Faker\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['南山区', '福田区', '宝安区', '龙岗区', '龙华区', '罗湖区', '坪山区', '光明区', '盐田区']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[i.split('-')[1] for i in dq_name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[70, 58, 44, 40, 25, 13, 5, 4, 1]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_dq[dq_name].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts':'https://assets.pyecharts.org/assets/v5/echarts.min', '深圳':'https://assets.pyecharts.org/assets/v5/maps/guang3_dong1_shen1_zhen4'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "\n",
       "        <div id=\"7395483eb3f34e0b9a7b0f81802aa520\" style=\"width:900px; height:500px;\"></div>\n",
       "\n",
       "<script>\n",
       "        require(['echarts', '深圳'], function(echarts) {\n",
       "                var chart_7395483eb3f34e0b9a7b0f81802aa520 = echarts.init(\n",
       "                    document.getElementById('7395483eb3f34e0b9a7b0f81802aa520'), 'white', {renderer: 'canvas'});\n",
       "                var option_7395483eb3f34e0b9a7b0f81802aa520 = {\n",
       "    \"animation\": true,\n",
       "    \"animationThreshold\": 2000,\n",
       "    \"animationDuration\": 1000,\n",
       "    \"animationEasing\": \"cubicOut\",\n",
       "    \"animationDelay\": 0,\n",
       "    \"animationDurationUpdate\": 300,\n",
       "    \"animationEasingUpdate\": \"cubicOut\",\n",
       "    \"animationDelayUpdate\": 0,\n",
       "    \"aria\": {\n",
       "        \"enabled\": false\n",
       "    },\n",
       "    \"color\": [\n",
       "        \"#5470c6\",\n",
       "        \"#91cc75\",\n",
       "        \"#fac858\",\n",
       "        \"#ee6666\",\n",
       "        \"#73c0de\",\n",
       "        \"#3ba272\",\n",
       "        \"#fc8452\",\n",
       "        \"#9a60b4\",\n",
       "        \"#ea7ccc\"\n",
       "    ],\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"map\",\n",
       "            \"name\": \"\\u5e7f\\u544a\\u7b56\\u5212\\u5730\\u533a\\u5206\\u5e03\",\n",
       "            \"label\": {\n",
       "                \"show\": true,\n",
       "                \"margin\": 8\n",
       "            },\n",
       "            \"map\": \"\\u6df1\\u5733\",\n",
       "            \"data\": [\n",
       "                {\n",
       "                    \"name\": \"\\u5357\\u5c71\\u533a\",\n",
       "                    \"value\": 70\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u798f\\u7530\\u533a\",\n",
       "                    \"value\": 58\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5b9d\\u5b89\\u533a\",\n",
       "                    \"value\": 44\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9f99\\u5c97\\u533a\",\n",
       "                    \"value\": 40\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9f99\\u534e\\u533a\",\n",
       "                    \"value\": 25\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7f57\\u6e56\\u533a\",\n",
       "                    \"value\": 13\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u576a\\u5c71\\u533a\",\n",
       "                    \"value\": 5\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5149\\u660e\\u533a\",\n",
       "                    \"value\": 4\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u76d0\\u7530\\u533a\",\n",
       "                    \"value\": 1\n",
       "                }\n",
       "            ],\n",
       "            \"roam\": true,\n",
       "            \"aspectScale\": 0.75,\n",
       "            \"nameProperty\": \"name\",\n",
       "            \"selectedMode\": false,\n",
       "            \"zoom\": 1,\n",
       "            \"zlevel\": 0,\n",
       "            \"z\": 2,\n",
       "            \"seriesLayoutBy\": \"column\",\n",
       "            \"datasetIndex\": 0,\n",
       "            \"mapValueCalculation\": \"sum\",\n",
       "            \"showLegendSymbol\": true,\n",
       "            \"emphasis\": {}\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"\\u5e7f\\u544a\\u7b56\\u5212\\u5730\\u533a\\u5206\\u5e03\"\n",
       "            ],\n",
       "            \"selected\": {},\n",
       "            \"show\": true,\n",
       "            \"padding\": 5,\n",
       "            \"itemGap\": 10,\n",
       "            \"itemWidth\": 25,\n",
       "            \"itemHeight\": 14,\n",
       "            \"backgroundColor\": \"transparent\",\n",
       "            \"borderColor\": \"#ccc\",\n",
       "            \"borderWidth\": 1,\n",
       "            \"borderRadius\": 0,\n",
       "            \"pageButtonItemGap\": 5,\n",
       "            \"pageButtonPosition\": \"end\",\n",
       "            \"pageFormatter\": \"{current}/{total}\",\n",
       "            \"pageIconColor\": \"#2f4554\",\n",
       "            \"pageIconInactiveColor\": \"#aaa\",\n",
       "            \"pageIconSize\": 15,\n",
       "            \"animationDurationUpdate\": 800,\n",
       "            \"selector\": false,\n",
       "            \"selectorPosition\": \"auto\",\n",
       "            \"selectorItemGap\": 7,\n",
       "            \"selectorButtonGap\": 10\n",
       "        }\n",
       "    ],\n",
       "    \"tooltip\": {\n",
       "        \"show\": true,\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"showContent\": true,\n",
       "        \"alwaysShowContent\": false,\n",
       "        \"showDelay\": 0,\n",
       "        \"hideDelay\": 100,\n",
       "        \"enterable\": false,\n",
       "        \"confine\": false,\n",
       "        \"appendToBody\": false,\n",
       "        \"transitionDuration\": 0.4,\n",
       "        \"textStyle\": {\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"borderWidth\": 0,\n",
       "        \"padding\": 5,\n",
       "        \"order\": \"seriesAsc\"\n",
       "    },\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"show\": true,\n",
       "            \"text\": \"Map-\\u6df1\\u5733-\\u5e7f\\u544a\\u7b56\\u5212\",\n",
       "            \"target\": \"blank\",\n",
       "            \"subtarget\": \"blank\",\n",
       "            \"padding\": 5,\n",
       "            \"itemGap\": 10,\n",
       "            \"textAlign\": \"auto\",\n",
       "            \"textVerticalAlign\": \"auto\",\n",
       "            \"triggerEvent\": false\n",
       "        }\n",
       "    ],\n",
       "    \"visualMap\": {\n",
       "        \"show\": true,\n",
       "        \"type\": \"continuous\",\n",
       "        \"min\": 0,\n",
       "        \"max\": 100,\n",
       "        \"inRange\": {\n",
       "            \"color\": [\n",
       "                \"#50a3ba\",\n",
       "                \"#eac763\",\n",
       "                \"#d94e5d\"\n",
       "            ]\n",
       "        },\n",
       "        \"calculable\": true,\n",
       "        \"inverse\": false,\n",
       "        \"splitNumber\": 5,\n",
       "        \"hoverLink\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"padding\": 5,\n",
       "        \"showLabel\": true,\n",
       "        \"itemWidth\": 20,\n",
       "        \"itemHeight\": 140,\n",
       "        \"borderWidth\": 0\n",
       "    }\n",
       "};\n",
       "                chart_7395483eb3f34e0b9a7b0f81802aa520.setOption(option_7395483eb3f34e0b9a7b0f81802aa520);\n",
       "        });\n",
       "    </script>\n"
      ],
      "text/plain": [
       "<pyecharts.render.display.HTML at 0x1f3a3a90e50>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Map\n",
    "from pyecharts.faker import Faker\n",
    "\n",
    "c = (\n",
    "    Map()\n",
    "    .add(\"广告策划地区分布\", [list(z) for z in zip([i.split('-')[1] for i in dq_name],series_dq[dq_name].values.tolist())], \"深圳\")\n",
    "    .set_global_opts(\n",
    "        title_opts=opts.TitleOpts(title=\"Map-深圳-广告策划\"), visualmap_opts=opts.VisualMapOpts()\n",
    "    )\n",
    "    \n",
    ")\n",
    "c.render_notebook()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0\n",
      "这是第1页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待3秒...然后以继续抓取\n"
     ]
    }
   ],
   "source": [
    "import crawl_liepin  \n",
    "  \n",
    "广告策划_深圳_results = crawl_liepin.crawl(城市=\"深圳\",关键词=\"广告策划\",学历='本科',工作经验='',登录cookie=cookie)  \n",
    "  \n",
    "# 将DataFrame保存为Excel文件  \n",
    "广告策划_深圳_results.to_excel(\"广告策划_深圳_results.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
